import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import plotnine as gg
from scipy import stats, integrate
sns.set(color_codes=True)
%matplotlib inline
matplotlib.rcParams['figure.figsize'] = (12,8)
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
from plotly import __version__
import cufflinks as cf
from sklearn.preprocessing import scale
hp15 = pd.read_csv("./world-happiness-report/2015.csv")
hp16 = pd.read_csv("./world-happiness-report/2016.csv")
hp17 = pd.read_csv("./world-happiness-report/2017.csv")
raw = pd.read_csv("final_model_data.csv")
hp15 = hp15.set_index('Country')
hp16 = hp16.set_index('Country')
hp17 = hp17.set_index('Country')
hp17 = hp17.rename(columns = {'Whisker.high':'Upper Confidence Interval','Whisker.low':'Lower Confidence Interval',
'Happiness.Score': 'Happiness Score', 'Economy..GDP.per.Capita.':'Economy (GDP per Capita)',
'Health..Life.Expectancy.':'Health (Life Expectancy)',
'Trust..Government.Corruption.':'Trust (Government Corruption)',
'Dystopia.Residual':'Dystopia Residual'})
hp17 = hp17.join(hp16.Region)
raw['Happiness Score'] = raw['Life Ladder']
data16 = raw[raw.year ==2016]
data16 = data16.set_index('country')
gdp16 = data16['Log GDP per capita'].dropna()
social16 = data16['Social support'].dropna()
healthy = data16['Healthy life expectancy at birth'].dropna()
freedom16 = data16['Freedom to make life choices'].dropna()
corruption16 = data16['Perceptions of corruption'].dropna()
hpall = raw['Happiness Score'].dropna()
gdpall = raw['Log GDP per capita'].dropna()
socialall = raw['Social support'].dropna()
freedomall = raw ['Freedom to make life choices'].dropna()
healthyall = raw['Healthy life expectancy at birth'].dropna()
corruptionall = raw['Perceptions of corruption'].dropna()
generosityall = raw['Generosity'].dropna()
data16_new=data16.dropna()
data16_new_hp = data16_new['Happiness Score']
data = pd.DataFrame(scale(data16_new),index=data16_new.index, columns= data16_new.columns)
data['Happiness Score'] = data16_new_hp
data = data.sort_values(['Happiness Score'],ascending=True)
sns.set(rc={'figure.figsize':(15,15)})
plt.subplot(421)
sns.distplot(hpall)
plt.subplot(422)
sns.distplot(gdpall)
plt.subplot(423)
sns.distplot(socialall)
plt.subplot(424)
sns.distplot(freedomall)
plt.subplot(425)
sns.distplot(healthyall)
plt.subplot(426)
sns.distplot(corruptionall)
plt.subplot(427)
sns.distplot(generosityall)
cf.go_offline()
data[['Happiness Score','Log GDP per capita','Social support']].iplot(kind='spread')
cf.go_offline()
data[['Happiness Score','Freedom to make life choices']].iplot(kind='spread')
def drawworld(df, year):
data = dict(type = 'choropleth',
locations = df.index,
locationmode = 'country names',
colorscale = [[0,"rgb(200, 50, 0)"],[0.85,"rgb(40,0, 190)"],[0.9,"rgb(70, 0, 245)"],
[0.94,"rgb(0, 0, 10)"],[0.97,"rgb(106, 0, 247)"],[1,"rgb(, 0, 200)"]],
z = df['Happiness Score'],
text = df.index,
colorbar = {'title':'Happiness Score'})
layout = dict(title = 'World Happiness Score in '+str(year),
geo = dict(showframe = False, projection = {'type': 'Mercator'}))
choromap3 = go.Figure(data = [data], layout=layout)
iplot(choromap3)
drawworld(hp16,2016)
drawworld(hp17,2017)
hp15['Year'] = '2015'
hp16['Year'] = '2016'
hp17['Year'] = '2017'
hp151617 = pd.concat([hp15[['Happiness Score','Region','Year']],
hp16[['Happiness Score','Region','Year']],
hp17[['Happiness Score','Region','Year']]])
sns.set(font_scale=2)
fig, axes = plt.subplots(figsize=(20, 14))
sns.boxplot(y='Region',x='Happiness Score',hue='Year', data = hp151617)
raw_cor = raw[['Life Ladder', 'Log GDP per capita', 'Social support', 'Healthy life expectancy at birth','Freedom to make life choices','Perceptions of corruption']]
raw_cor = raw_cor.dropna()
sns.set(rc={'figure.figsize':(15,15)})
sns.pairplot(raw_cor,kind="reg",diag_kind="kde")
raw.rename(columns={'country':'Country'},inplace =True)
raw = raw.set_index('Country')
raw = raw.join(hp15.Region)
raw.head()
sns.set(rc={'figure.figsize':(30,30)})
plt.subplot(321)
sns.set(font_scale=3)
ax = sns.boxplot(x = 'Happiness Score', y = 'Region',data=raw) #draw the barplot
ax.set(title = "Happiness Score By Region", xlabel = "", ylabel = "")
ax.set_xticklabels(ax.get_xticklabels(), rotation = 90)
ax.tick_params(labelsize=16)
plt.subplot(322)
sns.set(font_scale=3)
ax = sns.boxplot(x = 'Log GDP per capita', y = 'Region',data=raw) #draw the barplot
ax.set(title = "Log GDP per capita By Region", xlabel = "", ylabel = "")
ax.set_xticklabels(ax.get_xticklabels(), rotation = 90)
ax.tick_params(labelsize=16)
plt.subplot(323)
sns.set(font_scale=3)
ax = sns.boxplot(x = 'Social support', y = 'Region',data=raw) #draw the barplot
ax.set(title = "Social support By Region", xlabel = "", ylabel = "")
ax.set_xticklabels(ax.get_xticklabels(), rotation = 90)
ax.tick_params(labelsize=16)
plt.subplot(324)
sns.set(font_scale=3)
ax = sns.boxplot(x = 'Healthy life expectancy at birth', y = 'Region',data=raw) #draw the barplot
ax.set(title = "Healthy life expectancy at birth By Region", xlabel = "", ylabel = "")
ax.set_xticklabels(ax.get_xticklabels(), rotation = 90)
ax.tick_params(labelsize=16)
plt.subplot(325)
sns.set(font_scale=3)
ax = sns.boxplot(x = 'Freedom to make life choices', y = 'Region',data=raw) #draw the barplot
ax.set(title = "Freedom By Region", xlabel = "", ylabel = "")
ax.set_xticklabels(ax.get_xticklabels(), rotation = 90)
ax.tick_params(labelsize=16)
plt.subplot(326)
sns.set(font_scale=3)
ax = sns.boxplot(x = 'Perceptions of corruption', y = 'Region',data=raw) #draw the barplot
ax.set(title = "Corruption By Region", xlabel = "", ylabel = "")
ax.set_xticklabels(ax.get_xticklabels(), rotation = 90)
ax.tick_params(labelsize=16)
sns.set(rc={'figure.figsize':(200,200)})
ax = sns.factorplot(x="year", y="Log GDP per capita",col='Region',col_wrap=3,data=raw, kind="bar")
ax.set_xticklabels( rotation = 90)